### All packages
library(carData)
library(car)
library(effects)
library(ggplot2)
library(ggpubr)
library(Matrix)
library(tidyr)
library(pscl)  
library(lsmeans)
library(dplyr)
library(plyr)
library(limma)
library(edgeR)
library(preprocessCore)
library(gplots)
library(RColorBrewer)
library(statmod)
library(made4)
library(ade4)
library(reshape2)


#####################################################################################################################

### LIFE HISTORY RESPONSES ###

life <- ###upload "Life history responses" file
head(new_life)
str(life)
life$Off_viability <- as.numeric(life$Off_viability)
life$Bean <- as.factor(life$Bean)
life$Bean <- relevel(life$Bean, "Cowpea")


### Fecundity ###

### remove fecundity NAs from dataset
life_fec <- life %>% drop_na(Fecundity)
life_fec$Temperature <- as.factor(life_fec$Temperature)

### combined effects of bean type and temperature on fecundity
hist(life_fec$Fecundity)
fit <- glm(Fecundity ~ Temperature*Bean, data=life_fec, family="quasipoisson")
summary(fit)

##mean, sd and se
means<- life_fec %>%
  group_by(Temperature, Bean) %>%
  summarise(mean_Fec = mean(Fecundity), sd_Fec=sd(Fecundity), se_Fec=se(Fecundity))

means

### plotting interaction with confidence intervals plotted
f <- ggpredict(fit, terms = c("Temperature", "Bean"))
f

###save CIs as separate file

life_fec_ci <- ###upload CI file
life_fec_ci$Bean <- as.factor(life_fec_ci$Bean)
life_fec_ci$Bean <- relevel(life_fec_ci$Bean, "Cowpea")

### fecundity interaction plot
### rerun life_fec spreadsheet to change temperature to integer
C <- ggplot(life_fec, aes(x=Temperature, y=Fecundity))+
  geom_jitter(data=life_fec, aes(x=Temperature, y=Fecundity, fill=Bean, color=Bean), size= 1.5, position=position_jitter(width=0.4, height=0), alpha=0.5)+  
  geom_line(data=f, aes(y=predicted, x=x, colour=group), linewidth=1) + 
  theme_classic()+
  labs(y="F1 Fecundity (No. eggs)") +
  scale_x_continuous(breaks = c(27,35))+
  theme(axis.text=element_text(size=10))+
  theme(axis.title.x=element_blank())+
  theme(axis.title.y=element_text(size=12, face="bold")) +
  scale_color_manual(values = c("#5056C7", "#C78350", "#69C750")) +
  scale_fill_manual(values = c("#5056C7", "#C78350", "#69C750"))

d <- C + geom_errorbar(data=life_fec_ci, aes(ymin=Lower, ymax=Upper, color=Bean), width=0.2, alpha=0.8, linewidth=0.7)+geom_point(data=life_fec_ci, shape=21, size=3.1, fill="white", aes(color=Bean))
plot(d)


### Developmental period ###

### remove values with 0 for dev period (of life_fec spreadsheet)
Dev <- subset(life_fec, life_fec$Dev_period>0)
Dev$Temperature <- as.factor(Dev$Temperature)

### combined effects of bean type and temperature on developmental period
fit <- glm(Dev_period ~ Temperature*Bean, data=Dev, family=Gamma)
summary(fit)

### means, sd and se
means<- Dev %>%
  group_by(Temperature, Bean) %>%
  summarise(mean_Dev = mean(Dev_period), sd_Dev=sd(Dev_period), se_Dev=se(Dev_period))

means

### plotting interaction model with confidence intervals plotted
dpredict <- ggpredict(fit, terms = c("Temperature", "Bean"))
dpredict

###save CIs as separate file

Dev_ci <- ###upload CI file
Dev_ci$Bean <- as.factor(Dev_ci$Bean)
Dev_ci$Bean <- relevel(Dev_ci$Bean, "Cowpea")


### developmental period interaction plot
### rerun Dev spreadsheet to change temperature back to integer
G <- ggplot(Dev, aes(x=Temperature, y=Dev_period))+
  geom_jitter(data=Dev, aes(x=Temperature, y=Dev_period, fill=Bean, color=Bean), size= 1.5, position=position_jitter(width=0.4, height=0), alpha=0.5)+
  geom_line(data=dpredict, aes(y=predicted, x=x, colour=group), linewidth=1) + 
  theme_classic()+
  labs(y="F2 Developmental Period (Days)") +
  scale_x_continuous(breaks = c(27,35))+
  theme(axis.text=element_text(size=10))+
  theme(axis.title.x=element_blank())+
  theme(axis.title.y=element_text(size=12, face="bold")) +
  scale_color_manual(values = c("#5056C7", "#C78350", "#69C750")) +
  scale_fill_manual(values = c("#5056C7", "#C78350", "#69C750"))

h <- G + geom_errorbar(data=Dev_ci, aes(ymin=Lower, ymax=Upper, color=Bean), width=0.2, alpha=0.8, linewidth=0.7)+geom_point(data=Dev_ci, shape=21, size=3.1, fill="white", aes(color=Bean))
plot(h)


### Offspring viability ###

### need to remove NA from datasheet
new_life <- life %>% drop_na(Non_hatching_eggs)
new_life$Temperature <- as.factor(new_life$Temperature)


### combined effects of temp and bean type on offspring viability with fecundity as a covariate
fit <- glm(cbind(No_offspring, Non_hatching_eggs) ~ Temperature*Bean+Fecundity, data = new_life, family=quasibinomial)
summary(fit)

### plotting interaction models with confidence intervals plotted
v <- ggpredict(fit, terms = c("Temperature", "Bean"))
v

###save CIs as separate file

### plotting interaction model with confidence intervals plotted
via_ci <- ###upload CI file
via_ci$Bean <- as.factor(via_ci$Bean)
via_ci$Bean <- relevel(via_ci$Bean, "Cowpea")


### viability interaction plot
### rerun new_life spreadsheet to change temperature to integer
D <- ggplot(new_life, aes(x=Temperature, y=Off_viability))+
  geom_jitter(data=new_life, aes(x=Temperature, y=Off_viability, fill=Bean, color=Bean), size= 1.5, position=position_jitter(width=0.4, height=0), alpha=0.5)+
  geom_line(data=v, aes(y=predicted, x=x, colour=group), linewidth=1) + 
  theme_classic()+
  labs(y="F2 Viability (Adults/Eggs)") +
  scale_x_continuous(breaks = c(27,35))+
  theme(axis.text=element_text(size=10))+
  theme(axis.title.x=element_blank())+
  theme(axis.title.y=element_text(size=12, face="bold")) +
  scale_color_manual(values = c("#5056C7", "#C78350", "#69C750")) +
  scale_fill_manual(values = c("#5056C7", "#C78350", "#69C750"))

e <- D + geom_errorbar(data=via_ci, aes(ymin=Lower, ymax=Upper, color=Bean), width=0.2, alpha=0.8, linewidth=0.7)+geom_point(data=via_ci, shape=21, size=3.1, fill="white", aes(color=Bean))
plot(e)



#### plotting interaction models together ###

tiff("Fig. 2.tiff", units="px", width=3500, height=1500, res=300)
figure<-ggarrange(d,h,e, labels=c("A", "B", "C"), nrow=1, label.x=0.9, common.legend=TRUE, legend="right")
annotate_figure(figure, bottom=text_grob("Temperature (°C)", vjust=0, size=12, face="bold"))
dev.off()


####################################################################################################################


### RNA SEQUENCING ###

### Spearman rank correlations using LogFC of each gene within each treatment pairwise comparison ###

### Temperature (Cow 35 vs Cow 27) vs Resource (Chick 27 vs Cow 27) logFC correlation of 108 shared genes
corr1 <- ###upload "Spearman temp vs res logFC" file
str(corr1)

fit1 <- cor.test(x=corr1$Temperature, y=corr1$Resource, method = 'spearman')
fit1


A <- ggplot(corr1, aes(x=Temperature, y=Resource)) + 
  geom_point(color='#2980B9', size = 4) + 
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE, color='#2C3E50') +
  theme_classic() +
  labs(x="Temperature change", y="Resource change") +
  theme(axis.text.x = element_text(size = 12), axis.title.x = element_text(size=14, face="bold"), axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 14, face="bold"))

### Temperature (Cow 35 vs Cow 27) vs Multidimensional (Chick 35 vs Cow 27) logFC correlation of 461 genes
corr2 <- ###upload "Spearman temp vs multi logFC" fie

fit2 <- cor.test(x=corr2$Temperature, y=corr2$Multidimensional, method = 'spearman')
fit2

B <- ggplot(corr2, aes(x=Temperature, y=Multidimensional)) + 
  geom_point(color='#2980B9', size = 4) + 
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE, color='#2C3E50') +
  theme_classic() +
  labs(x="Temperature change", y="Multi-dimensional change") +
  theme(axis.text.x = element_text(size = 12), axis.title.x = element_text(size=14, face="bold"), axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 14, face="bold"))

### Resource (Chick 27 vs Cow 27) vs Multidimensional (Chick 35 vs Cow 27) logFC correlation of 82 genes

corr3 <- ###upload "Spearman res vs multi logFC" file

fit3 <- cor.test(x=corr3$Resource, y=corr3$Multidimensional, method = 'spearman')
fit3

C <- ggplot(corr3, aes(x=Resource, y=Multidimensional)) + 
  geom_point(color='#2980B9', size = 4) + 
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE, color='#2C3E50') +
  theme_classic() +
  labs(x="Resource change", y="Multi-dimensional change") +
  theme(axis.text.x = element_text(size = 12), axis.title.x = element_text(size=14, face="bold"), axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 14, face="bold"))

C


### plotting together

tiff("Supplementary figure S1.tiff", units="px", width=5500, height=2000, res=300)
figure<-ggarrange(A, B, C, labels=c("A", "B", "C"), nrow=1, label.x=0.95, font.label = list(size = 18))
figure
dev.off()


### edgeR analysis ###

d <- ###upload "readcount_genename" file
head(d)

colnames(d) <- c("gene_id","Co27.1","Co27.2","Co27.3","Co27.4","Co35.1","Co35.2","Co35.3","Co35.4","Ch27.1","Ch27.2","Ch27.3","Ch27.4", "Ch35.1","Ch35.2","Ch35.3","Ch35.4", "gene_name", "gene_chr", "gene_start", "gene_end", "gene_strand", "gene_length", "gene_biotype", "gene_description", "tf_family")

dx <- d[,2:17]
rownames(dx) <- d[,1]

head(dx)


#assign factor levels based on column names
Treat <- factor(substring(colnames(dx),1,2))
Treat <- relevel(Treat, ref="Co")
Treat
Temp <- factor(substring(colnames(dx),3,4))
Temp
TempTreat <- factor(substring(colnames(dx),1,4))
TempTreat

#normalization - 
### DEG list stores data in a simple list-based data object with a grouping factor (TempTreat)
y <- DGEList(counts = dx, group =TempTreat)
keep <- filterByExpr(y)
table(keep)
###keep only false reads
y <- y[keep, , keep.lib.sizes=FALSE]

y <- calcNormFactors(y, method="none")
y$samples

#MDS plot shows the relative similarities of the 16 samples
plotMDS(y, col=rep(1:4, each=4))


# #look into consistency among samples (not sure what this is doing, skip for now)
### i.e., looking for sample variation within each group (Co27.1 to Co27.2 etc)
# design <- model.matrix(~Replicate+Replicate:Treat)
# logFC <- predFC(y,design, prior.count=1, dispersion=0.05)
# cor(logFC[,6:10])
# #

#define the design matrix for the test you want to run.
#I want to test for temperature x treatment effects:
design <- model.matrix(~Treat * Temp)
rownames(design) <- colnames(y) #cowpea and 27 are baseline.
design

#estimate the dispersion
###Estimate the genewise dispersion estimates over all genes, allowing for a possible abundance trend. The estimation is also robustified against potential outlier genes.
y <- estimateGLMCommonDisp(y, design, verbose = TRUE)
y$common.dispersion



###plot biological coefficient of variation (square root of dispersion) against average log counts per million
###a way to measure the biological variation within a particular condition
plotBCV(y)

#fit genewise glms
###likelihood ratio test
fit <- glmFit(y, design)
lrt <- glmLRT(fit)
topTags(lrt)

FDR <- p.adjust(lrt$table$PValue, method = "BH")
sum(FDR < 0.05)
top <- rownames(topTags(lrt))
cpm(y)[top,]

topTags <- topTags(lrt, n=115, adjust.method = "BH")

toptags2 <- merge(topTags, cpm(y), by="row.names")
head(toptags2)
length(toptags2$logFC)

write.csv(toptags2, "treatxtemp.csv")

d <- ###upload "readcount_genename" file
head(d)
dx <- d[,2:17]
rownames(dx) <- d[,1]

#throw out genes that are not expressed in any condition (i.e., are not expressed in at least 2 samples)
###don't use
keep <- rowSums(cpm(dx)>0) >= 2
df <- dx[keep,]
table(keep)
 
TreatTemp <- factor(c("Cowpea-27","Cowpea-27","Cowpea-27","Cowpea-27","Cowpea-35","Cowpea-35","Cowpea-35","Cowpea-35","Chickpea-27","Chickpea-27","Chickpea-27","Chickpea-27", "Chickpea-35","Chickpea-35","Chickpea-35","Chickpea-35"))

tiff("bga.tiff", units="px", width=4000, height=2000, res=300)
k.bga<-bga(dx, type="pca", classvec=TreatTemp)
plot.bga(k.bga, axis1=1, axis2=2)
dev.off()

str(k.bga)
###find $eig

bga.eigen <- k.bga$ord$ord$eig
bga.eigen / sum(bga.eigen) * 100

###3 eigen values within TreatTemp vector
### 2532, 1608, 1098, total = 5238
###PC1 = (2532/5238)*100 = 48%
###PC2 = (1608/5238)*100 = 31%
###PC3 = (1098/5238)*100 = 21%


### Plotting edgeR functions ###

d <- ###upload "treatxtemp with function" file
head(d)

library(reshape2)

d2 <- d[,c(2,24:29)] #be sure to include the column number that says the function
head(d2)


d3 <- melt(d2, id.vars=c("Row.names", "Function", "Specifically")) #should get a table where each row nmane aka gene name is repeated 4 times with one value for each level of 'varable' which is the treatment. Try with and without including function as an ID variable to see what gives the correct output. Legth of data set should be 4x the length of the origina d2

dat2s <- ddply(d3, c("Function","variable", "Specifically"), summarise, N= length(value), mean = mean(value), sd = sd(value), se = sd/sqrt(N))
dat2s[is.na(dat2s)] <- 0 
pd <- position_dodge(0.1)

dat2s2 <- dat2s[dat2s$Specifically == "Carbohydrate metabolism" | dat2s$Specifically == "Cellulose metabolism" | dat2s$Specifically == "Hydrolysis" | dat2s$Specifically == "Insulin transmembrane receptor" | dat2s$Specifically == "Lactose metabolism" | dat2s$Specifically == "Protein metabolism" | dat2s$Specifically == "Hormone binding" | dat2s$Specifically == "Inflammation" | dat2s$Specifically == "Venom allergen" | dat2s$Specifically == "Chitin binding" | dat2s$Specifically == "Insecticide detoxification" | dat2s$Specifically == "Sugar transporter" | dat2s$Specifically == "Endopeptidase" | dat2s$Specifically == "Transcription activation" | dat2s$Specifically == "Unknown" | dat2s$Specifically == "Lipid regulation" | dat2s$Specifically == "Sugar metabolism" | dat2s$Specifically == "Retroviral enzyme encoding gene", ]
a <- ggplot(dat2s2, aes(x=variable, y=mean, colour = Specifically, group=Specifically)) + geom_errorbar(aes(ymin=mean-se, ymax=mean+se), colour = "black", width =.1, position =pd) + geom_line(position = pd) + geom_point(size = 3, position=position_jitterdodge())+xlab("Treatment")+ylab("Mean counts per specific function")+guides(colour=guide_legend(title="Specific function"))+scale_x_discrete(labels=c("Cowpea 27°C", "Cowpea 35°C", "Chickpea 27°C", "Chickpea 35°C"))+theme_classic()+theme(axis.text=element_text(size=10))+theme(axis.title.y=element_text(size=12, face="bold"))+theme(axis.title.x=element_text(size=12, face="bold"))+ggtitle("Multi-dimensional upregulation")+theme(plot.title = element_text(hjust = 0.5))
a

dat2s3 <- dat2s[dat2s$Specifically == "Fat metabolism" | dat2s$Specifically == "Mitochondrial metabolism" | dat2s$Specifically == "Inorganic phosphate transporter" | dat2s$Specifically == "Transmembrane transporter" | dat2s$Specifically == "Amino acid catabolism" | dat2s$Specifically == "Carbohydrate metabolism and detoxification" | dat2s$Specifically == "DNA transposition" | dat2s$Specifically == "Purine metabolism", ]
b <- ggplot(dat2s3, aes(x=variable, y=mean, colour = Specifically, group=Specifically)) + geom_errorbar(aes(ymin=mean-se, ymax=mean+se), colour = "black", width =.1, position =pd) + geom_line(position = pd) + geom_point(position = position_jitterdodge(), size = 3)+xlab("Treatment")+ylab("Mean counts per specific function")+guides(colour=guide_legend(title="Specific function"))+scale_x_discrete(labels=c("Cowpea 27°C", "Cowpea 35°C", "Chickpea 27°C", "Chickpea 35°C"))+theme_classic()+theme(axis.text=element_text(size=10))+theme(axis.title.y=element_text(size=12, face="bold"))+theme(axis.title.x=element_text(size=12, face="bold"))+ggtitle("Control and Multi-dimensional upregulation")+theme(plot.title = element_text(hjust = 0.5))
b


tiff("Fig. 4.1.tiff", units="px", width=5000, height=2000, res=300)
mainfigure1 <- ggarrange(a, b, legend="right", labels=c("A", "B"), label.x=0.9)
mainfigure1
dev.off()

dat2s4 <- dat2s[dat2s$Specifically == "Transmembrane/signal transduction" | dat2s$Specifically == "Detoxification" | dat2s$Specifically == "DNA binding" | dat2s$Specifically == "Cystein and glutathione homeostasis" | dat2s$Specifically == "Ion transport" | dat2s$Specifically == "Smooth muscle development", ]
c <- ggplot(dat2s4, aes(x=variable, y=mean, colour = Specifically, group=Specifically)) + geom_errorbar(aes(ymin=mean-se, ymax=mean+se), colour = "black", width =.1, position =pd) + geom_line(position = pd) + geom_point(position = position_jitterdodge(), size = 3)+xlab("Treatment")+ylab("Mean counts per specific function")+guides(colour=guide_legend(title="Specific function"))+scale_x_discrete(labels=c("Cowpea 27°C", "Cowpea 35°C", "Chickpea 27°C", "Chickpea 35°C"))+theme_classic()+theme(axis.text=element_text(size=10))+theme(axis.title.y=element_text(size=12, face="bold"))+theme(axis.title.x=element_text(size=12, face="bold"))+ggtitle("Multi-dimensional downregulation")+theme(plot.title = element_text(hjust = 0.5))
c

dat2s5 <- dat2s[dat2s$Specifically == "Synapsis" | dat2s$Specifically == "Cellular processes" | dat2s$Specifically == "Antifungal peptide" | dat2s$Specifically == "Acetylcholine neurotransmission" | dat2s$Specifically == "Neuronal development" | dat2s$Specifically == "Spermatogenesis" | dat2s$Specifically == "Sensory signalling" | dat2s$Specifically == "Cellular component" | dat2s$Specifically == "Electron transport" | dat2s$Specifically == "Male sterility protein" | dat2s$Specifically == "Nuclear chaperone" | dat2s$Specifically == "Energy transport" | dat2s$Specifically == "Insect lipoprotein uptake" | dat2s$Specifically == "Transposable elements", ]
d1 <- ggplot(dat2s5, aes(x=variable, y=mean, colour = Specifically, group=Specifically)) + geom_errorbar(aes(ymin=mean-se, ymax=mean+se), colour = "black", width =.1, position =pd) + geom_line(position = pd) + geom_point(position = position_jitterdodge(), size = 3)+xlab("Treatment")+ylab("Mean counts per specific function")+guides(colour=guide_legend(title="Specific function"))+scale_x_discrete(labels=c("Cowpea 27°C", "Cowpea 35°C", "Chickpea 27°C", "Chickpea 35°C"))+theme_classic()+theme(axis.text=element_text(size=10))+theme(axis.title.y=element_text(size=12, face="bold"))+theme(axis.title.x=element_text(size=12, face="bold"))+ggtitle("Temperature upregulation")+theme(plot.title = element_text(hjust = 0.5))
d1

dat2s6 <- dat2s[dat2s$Specifically == "DNA repair" | dat2s$Specifically == "Multifunctional" | dat2s$Specifically == "Reverse transcription" | dat2s$Specifically == "Redox signalling", ]
e <- ggplot(dat2s6, aes(x=variable, y=mean, colour = Specifically, group=Specifically)) + geom_errorbar(aes(ymin=mean-se, ymax=mean+se), colour = "black", width =.1, position =pd) + geom_line(position = pd) + geom_point(position = position_jitterdodge(), size = 3)+xlab("Treatment")+ylab("Mean counts per specific function")+guides(colour=guide_legend(title="Specific function"))+scale_x_discrete(labels=c("Cowpea 27°C", "Cowpea 35°C", "Chickpea 27°C", "Chickpea 35°C"))+theme_classic()+theme(axis.text=element_text(size=10))+theme(axis.title.y=element_text(size=12, face="bold"))+theme(axis.title.x=element_text(size=12, face="bold"))+ggtitle("Temperature downregulation")+theme(plot.title = element_text(hjust = 0.5))
e

tiff("Fig. 4.2.tiff", units="px", width=8000, height=3000, res=300)
mainfigure2 <- ggarrange(c, d1, e, legend="right", labels=c("C", "D", "E"), nrow=1, label.x=0.9)
mainfigure2
dev.off()



### Hypergeometric tests of PFAM annotation over-representation ###

### testing whether gene functions are common in the 115 edgeR genes (treatxtemp with function.csv) than in the 21259 DE genes (readcount_genename.csv)

#### Multi-dimensional (Figure A)

##chitin binding
dhyper(x=2, m=41, n = 21218, k=115, log = FALSE)
##p=0.02, so yes it is more common in the 115 edgeR genes


##cathepsin L endopeptidase
dhyper(x=1, m=11, n = 21248, k=115, log = FALSE)
##p=0.06, so marginally likely to find this gene more in the 115 edgeR gene set


##cytochrome P450 (insecticide detox)
dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes


##esterase B1 carboxylesterase (insecticide detox)
dhyper(x=1, m=3, n = 21256, k=115, log = FALSE)
##p=0.02, so yes it is more common in the 115 edgeR genes


##ABC-2 family transporter protein (insecticide detox)
dhyper(x=1, m=4, n = 21255, k=115, log = FALSE)
##p=0.02, so yes it is more common in the 115 edgeR genes


##trehalose (sugar transporter)
dhyper(x=4, m=25, n = 21234, k=115, log = FALSE)
##p=9.21e-06, so yes it is more common in the 115 edgeR genes


##amylase (carbohydrate metabolism)
dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes


##xylulose reductase (carbohydrate metabolism)
dhyper(x=1, m=4, n = 21255, k=115, log = FALSE)
##p=0.02, so yes it is more common in the 115 edgeR genes


##Cytosolic beta-glucosidase (cellulose metabolism)
dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes


##Protein metabolism
dhyper(x=2, m=3, n = 21256, k=115, log = FALSE)
##p=8.7e-05, so yes it is more common in the 115 edgeR genes

dhyper(x=1, m=6, n = 21253, k=115, log = FALSE)
##p=0.03, so yes it is more common in the 115 edgeR genes

dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes

dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Sugar metabolism
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Hormone binding
dhyper(x=2, m=33, n = 21226, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes


##Hydrolysis
dhyper(x=5, m=15, n = 21244, k=115, log = FALSE)
##p=1.2e-08, so yes it is more common in the 115 edgeR genes


##Inflammation
dhyper(x=4, m=18, n = 21247, k=115, log = FALSE)
##p=2.3e-06, so yes it is more common in the 115 edgeR genes


##Insulin transmembrane receptor
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Lactose metabolism
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes

dhyper(x=2, m=3, n = 21256, k=115, log = FALSE)
##p=8.7e-05, so yes it is more common in the 115 edgeR genes


##Lipid regulation
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Retroviral enzyme encoding gene
dhyper(x=1, m=3, n = 21256, k=115, log = FALSE)
##p=0.02, so yes it is more common in the 115 edgeR genes


##Transcription activation
dhyper(x=1, m=14, n = 21245, k=115, log = FALSE)
##p=0.07, so marginally more common in the 115 edgeR genes


##Venom allergen
dhyper(x=1, m=8, n = 21251, k=115, log = FALSE)
##p=0.04, so yes it is more common in the 115 edgeR genes



###other paragraph genes of interest

###Cellular component (Fig D)
##Component of nuclear pore complex (Cellular component)
dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes

##Nuclear envelope spectrin repeat proteins that bind to actin filaments (Cellular component)
dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes

##Regulate cell-cell adhesion through their extracellular domain and their cytosolic domains connect to the actin cytoskeleton by binding to catenins (Cellular component)
dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes

##Subunit of laminin, laminin interacts with integrins as part of the cytoskeleton (Cellular component)
dhyper(x=1, m=4, n = 21255, k=115, log = FALSE)
##p=0.02, so yes it is more common in the 115 edgeR genes

##Erthyrocyte (RBC) membrane protein (Cellular component)
dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes

##Involved in regulation of microtubule pulling forces during mitotic movement of chromosomes, regulates neurotransmitter secretion (Cellular component)
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes

##Muscle/cytoskeleton (Cellular component) - mysosin
dhyper(x=1, m=21, n = 21238, k=115, log = FALSE)
##0.1 so no not more common in 115 edgeR genes


##E3 ubiquitin-protein ligase (Cellular processes) (Figure D)
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Gamma-glutamyltranspeptidase 1 (Cystein and glutathione homeostasis) (Figure C)
dhyper(x=1, m=6, n = 21253, k=115, log = FALSE)
##p=0.03, so yes it is more common in the 115 edgeR genes


###Ataxia telangiectasia and Rad3-related protein (DNA repair, Figure E)
dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes


##Thioredoxin (redox signalling, Figure E)
dhyper(x=1, m=3, n = 21256, k=115, log = FALSE)
##p=0.02, so yes it is more common in the 115 edgeR genes




###all other

###Fig B
##Amino acid catabolism (Fig B)
dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes


##DNA transposition (Fig B)
dhyper(x=1, m=6, n = 21253, k=115, log = FALSE)
##p=0.03, so yes it is more common in the 115 edgeR genes


##Fat metabolism (Fig B)
dhyper(x=1, m=10, n = 21249, k=115, log = FALSE)
##p=0.05, so yes it is more common in the 115 edgeR genes


##Inorganic phosphate transporter (Fig B)
dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes


##Mitochondrial metabolism (Fig B)
dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes


##Purine metabolism (Fig B)
dhyper(x=1, m=4, n = 21255, k=115, log = FALSE)
##p=0.02, so yes it is more common in the 115 edgeR genes

dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Transmembrane transporter (Fig B)
dhyper(x=1, m=3, n = 21256, k=115, log = FALSE)
##p=0.02, so yes it is more common in the 115 edgeR genes




###Fig C
##15-hydroxyprostaglandin dehydrogenase [NADP(+)];(detoxification) (Fig C)
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##DNA binding (Fig C)
dhyper(x=1, m=28, n = 21231, k=115, log = FALSE)
##0.1  so no not more common in 115 edgeR genes

dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Ion transport (Fig C)
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Smooth muscle development (Fig C)
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Transmembrane/signal transduction (Fig C)
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


###Fig D

##Acetylcholine neurotransmission (Fig D)
dhyper(x=1, m=3, n = 21256, k=115, log = FALSE)
##p=0.02, so yes it is more common in the 115 edgeR genes


##Insect lipoprotein uptake (Fig D)
dhyper(x=1, m=31, n = 21228, k=115, log = FALSE)
##p=0.1,  so no not more common in 115 edgeR genes


##Antifungal peptide (Fig D)
dhyper(x=2, m=16, n = 21243, k=115, log = FALSE)
##p=0.003, so yes it is more common in the 115 edgeR genes


##Electron transport (Fig D)
dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, soyes it is more common in the 115 edgeR genes


##Energy transport (Fig D)
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Male sterility protein (Fig D)
dhyper(x=1, m=20, n = 21239, k=115, log = FALSE)
##p=0.1, so no not more common in 115 edgeR genes

dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes

dhyper(x=1, m=197, n = 21062, k=115, log = FALSE)
##p=0.4, so no not more common in 115 edgeR genes

dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


###Neuronal development (Fig D)
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes

dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes


##Nuclear chaperone (Fig D)
dhyper(x=2, m=7, n = 21252, k=115, log = FALSE)
##p=0.0006, so yes it is more common in the 115 edgeR genes



##Sensory signalling (Fig D)
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Spermatogenesis (Fig D)
dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Synapsis (Fig D)
dhyper(x=1, m=3, n = 21256, k=115, log = FALSE)
##p=0.02, so yes it is more common in the 115 edgeR genes


##Transposable elements (Fig D)
dhyper(x=1, m=5, n = 21254, k=115, log = FALSE)
##p=0.03, so yes it is more common in the 115 edgeR genes



##Fig E
##Multifunctional (Fig E)
dhyper(x=1, m=19, n = 21240, k=115, log = FALSE)
##p=0.09, so marginally common in 115 edgeR genes

dhyper(x=1, m=2, n = 21257, k=115, log = FALSE)
##p=0.01, so yes it is more common in the 115 edgeR genes

dhyper(x=1, m=205, n = 21054, k=115, log = FALSE)
##p=0.4, so no not more common in the 115 edgeR genes

dhyper(x=1, m=1, n = 21258, k=115, log = FALSE)
##p=0.005, so yes it is more common in the 115 edgeR genes


##Reverse transcription (Fig E)
dhyper(x=1, m=9, n = 21250, k=115, log = FALSE)
##p=0.05, so yes it is more common in the 115 edgeR genes
